
** Prepare serially correlated and non-serially correlated fake data

global serial_corr ../china/estimation/serial_correlation
global data ../china/data

* 50 importers, 50 exporters.  So for each MC run, I will need 50*50=2,500 prices and Gumbel errors, and 50 supplier qualities


use $data/all0506_hs10, clear
keep if hs=="8525209070"
summ log_price if year==2005, d
local price2005_mean=`r(mean)'
local price2005_sd=`r(sd)'
summ log_price if year==2006, d
local price2006_mean=`r(mean)'
local price2006_sd=`r(sd)'
summ lambda, d
clear
set obs 50
gen imp=_n
expand 50
bysort imp: gen exp=_n
set seed 80
gen price2005_sim=rnormal(`price2005_mean',`price2005_sd')
gen price2006_sim=rnormal(`price2005_mean',`price2005_sd')
save $serial_corr/price, replace

use $data/all0506_hs10, clear
keep if hs=="8525209070"
summ lambda if year==2006, d
local lambda2006_mean=`r(mean)'
local lambda2006_sd=`r(sd)'
clear
set obs 50
gen exp=_n
set seed 80
gen lambda2006_sim=rnormal(`lambda2006_mean',`lambda2006_sd')
save $serial_corr/quality, replace


set more off
forv x=1/50 {

clear

import delimited $serial_corr/eps1/eps1_run`x'.csv, clear
ren v1 eps1
save $serial_corr/eps1, replace

clear

import delimited $serial_corr/eps2/eps2_run`x'.csv, clear
ren v1 eps2
save $serial_corr/eps2, replace


import delimited $serial_corr/eps3/eps3_run`x'.csv, clear
ren v1 eps3
save $serial_corr/eps3, replace

clear 

set obs 50

gen imp=_n
expand 50
bysort imp: gen exp=_n

gen city=exp/5
replace city=ceil(city)

merge 1:1 _n using $serial_corr/eps1
drop _m

merge 1:1 _n using $serial_corr/eps2
drop _m

merge 1:1 _n using $serial_corr/eps3
drop _m

merge m:1 imp exp using $serial_corr/price
drop _m

merge m:1 exp using $serial_corr/quality
drop _m

bysort imp: egen max_eps1=max(eps1)
gen zeroeth_partner=0
replace zeroeth_partner=1 if eps1==max_eps1
gen zeroeth_partner_city=0
replace zeroeth_partner_city=1 if eps1==max_eps1
bysort imp city: egen zeroeth_city=max(zeroeth_partner_city)
drop max_eps1
gen new_partner_1=(zeroeth_partner!=1)
gen new_city_1=(zeroeth_city!=1)

gen beta_p	= -0.1056
gen beta_x	= -2.6076
gen beta_c	= -1.3319
gen xi		=  0.0278


gen pi_2005=beta_p*price2006_sim + beta_x*new_partner_1 + beta_c*new_city_1 + xi*lambda2006_sim + eps2

bysort imp: egen max_pi_2005=max(pi_2005)
gen first_partner=0
replace first_partner=1 if pi_2005==max_pi_2005
gen first_partner_city=0
replace first_partner_city=1 if pi_2005==max_pi_2005
bysort imp city: egen first_city=max(first_partner_city)
drop max_pi_2005
gen new_partner=(first_partner!=1)
gen new_city=(first_city!=1)



gen pi_det=beta_p*price2006_sim + beta_x*new_partner + beta_c*new_city + xi*lambda2006_sim

preserve
gen eps2_big_sc=0.3*eps2 + 0.7*eps3
gen pi_big_sc=pi_det+eps2_big_sc

bysort imp: egen max_pi_big_sc=max(pi_big_sc)
gen second_partner=0
replace second_partner=1 if pi_big_sc==max_pi_big_sc

keep if first_partner==1 | second_partner==1
gen stayed=(first_partner==1 & second_partner==1)
tab stayed

expand 2 if stayed==1
sort imp exp
gen year=2005
by imp: replace year=2006 if _n==2
tab stayed

save $serial_corr/runs/sc/run`x', replace
restore

gen pi_no_sc=pi_det+eps3

bysort imp: egen max_pi_no_sc=max(pi_no_sc)
gen second_partner=0
replace second_partner=1 if pi_no_sc==max_pi_no_sc

keep if first_partner==1 | second_partner==1
gen stayed=(first_partner==1 & second_partner==1)
tab stayed
expand 2 if stayed==1
sort imp exp
gen year=2005
by imp: replace year=2006 if _n==2

save $serial_corr/runs/no_sc/run`x', replace

}



** Serially correlated errors first **

****** Start snipping code from "clean_data_hs10" ******


set more off
forv x=1/50 {
global raw "../china/estimation/serial_correlation/raw/sc/raw`x'"

use $serial_corr/runs/sc/run`x', clear
rename lambda2006_sim lambda
gen min_year=2005

/* Calculate the average price in 2005 at each exporter.  This will be merged back on later. */
preserve
keep if year==2005
bysort exp: egen price_minyear=mean(price2005_sim)
keep exp price_minyear
duplicates drop exp, force
save $raw/price_minyear, replace
restore

/* (1.5) Do the same for the average price in 2006 at each exporter.  This will help with counterfactuals. */
preserve
keep if year==2006
bysort exp: egen price_maxyear=mean(price2006_sim)
keep exp price_maxyear
duplicates drop exp, force
save $raw/price_maxyear, replace
restore

/* (2) Calculate the price increase in a city from 2005 to 2006.  This will be merged back on later. */
preserve

egen c=group(city)
egen city_total=max(c)

bysort city year: egen city_price2005=mean(price2005_sim)
bysort city year: egen city_price2006=mean(price2006_sim)

local C=city_total
forv i=1/`C' {
	gen city`i'preprice=city_price2005 if c==`i' & year==2005
	egen city`i'pre=max(city`i'preprice)
	gen city`i'postprice=city_price2006 if c==`i' & year!=2005
	egen city`i'post=max(city`i'postprice)
}

forv i=1/`C' {
	gen city`i'diff=city`i'post-city`i'pre
}

gen citydiff=0
forv i=1/`C' {
	replace citydiff=city`i'diff if c==`i'
}

gen citypre=0
gen citypost=0
forv i=1/`C' {
	replace citypre=city`i'pre if c==`i'
	replace citypost=city`i'post if c==`i'
}


keep city citydiff citypre citypost
drop if citydiff==. /* Can be missing if a city is only found in one year. I have handchecked. */
duplicates drop city, force
save $raw/citydiff, replace
restore

local N=5
local NP1 = `N'+1
local NM1 = `N'-1
egen minp=min(price2005_sim)
egen maxp=max(price2005_sim)

preserve
keep if year==min_year

*range statepts minp maxp `NP1'
pctile statep=price2005_sim, nquantiles(`N')
gen statepts=minp
replace statepts=statep[_n-1] if _n!=1
replace statepts=maxp if _n==`NP1'
keep in 1/`NP1'
outfile statepts using $raw/statepts.raw, replace
restore

merge m:1 exp using $raw/price_minyear
/* _m==3 means we have a price for them in 2005.  _m==1 means 2006 only.  _m==2 means that the only obs was
from an importer not found in 2006 */
drop if _m==2
*drop if _m==3
drop _m

merge m:1 exp using $raw/price_maxyear
/* _m==3 means we have a price for them in 2006.  _m==1 means that EXPORTER is only found in 2005.  _m==2 means that the 
IMPORTER from which an exporter's price was calculated in 2006 was not found in both years, meaning there is no exporter to match to. */
drop if _m==2
*drop if _m==3
drop _m

* Merge on the city information now: this way we won't be stuck with a majority exporter with no city info.
merge m:1 city using $raw/citydiff
/* _m==3 means we have a city difference for them.  _m==1 means only 1 year of city info.  _m==2 means that 
the only obs was from an importer not found in 2006 */
drop if _m!=3
drop _m




egen ct=group(city)
cap drop city_total
egen city_total=max(ct)
local C=city_total

egen m=group(imp)
egen importers=max(m)
local M=importers

egen xt=group(exp)
egen exporters=max(xt)
local X=exporters

sort imp year
gen ct1=ct[_n+1] if year==min_year
gen xt1=xt[_n+1] if year==min_year
gen pt1=price2006_sim[_n+1] if year==min_year
gen price_minyeart1=price_minyear[_n+1] if year==min_year /* This is necessary b/c the surviving price_minyear is that of the old exporter not the new */
gen price_maxyeart1=price_maxyear[_n+1] if year==min_year

gen lambdat1=lambda[_n+1] if year==min_year
drop if year!=min_year

gen exp_price=pt+citydiff if stayed==1


preserve
rename citydiff newcitydiff
rename citypre newcitypre
rename citypost newcitypost
keep ct newcitydiff newcitypre newcitypost
duplicates drop ct, force
rename ct ct1
save $raw/newcitydiff, replace
restore

merge m:1 ct1 using $raw/newcitydiff
* _m==2 means that in the end, no one moved to that city.  (only existed in 2005 in final data) Just drop.
* _m==1 means that city was never there in 2005 to calculate a difference for (only existed in 2006)
* Thus can just drop unless _m==3
drop if _m!=3
drop _m

forv j=1/`C' {
	replace price_minyeart1=newcitypre if ct1==`j' & price_minyeart1==.
}

/* Because of the difference between min and max year, this is not t1.  We need to replace the ones with
missing price_maxyear, as they won't be appended on after like the other variables. */

forv j=1/`C' {
	replace price_maxyear=newcitypost if ct1==`j' & price_maxyear==.
}


forv i=1/`X' {
	gen switchp`i'=price_minyear if xt==`i'
	egen p`i'=max(switchp`i')
	forv j=1/`C' {
		replace p`i'=newcitypre if p`i'==. & ct1==`j'
		}
}

forv i=1/`X' {
	replace exp_price=p`i'+ newcitydiff if stayed==0 & xt1==`i'
}

gen w=pt1-exp_price
/* The only missing observations are those that:
	-Had no information about the exporter before going there (no pre-price)
	-No information about the city before going there (no pre-city price)
At this point, I think it has been cleaned a lot- XX out of XX observations are missing.
OK drop them at this point. */
drop if w==.


save $raw/final, replace


** Standardize the exporter codes so they go from 1-X_max
use $raw/final, clear
keep xt1
rename xt1 xt
save $raw/xt1, replace

use $raw/final, clear
keep xt
append using $raw/xt1
duplicates drop xt, force
egen xxt=group(xt)
save $raw/xxt, replace
rename xt xt1
rename xxt xxt1
save $raw/xxt1, replace


** Standardize the city codes so they go from 1-C_max
use $raw/final, clear
keep ct1
rename ct1 ct
save $raw/ct1, replace

use $raw/final, clear
keep ct
append using $raw/ct1
duplicates drop ct, force
egen cct=group(ct)
save $raw/cct, replace
rename ct ct1
rename cct cct1
save $raw/cct1, replace


use $raw/final, clear
merge m:1 xt using $raw/xxt
drop if _m==2
drop _m
merge m:1 xt1 using $raw/xxt1
drop if _m==2
drop _m
merge m:1 ct using $raw/cct
drop if _m==2
drop _m
merge m:1 ct1 using $raw/cct1
drop if _m==2
drop _m





* For the case when there is no xxt=1
local o=_N
local p=`o'+1
set obs `p'
replace xxt1=1 in `p'
gen fakeone=0
replace fakeone=1 in `p'
sort xxt1
bysort xxt1: gen nobs=_N
drop if fakeone==1 & nobs>1
drop nobs

* For the case when there is no xxt=MAX
local o=_N
local p=`o'+1
set obs `p'

egen xmax=max(xxt)
local k=xmax

replace xxt1=xmax in `p'
gen fakemax=0
replace fakemax=1 in `p'
sort xxt1
bysort xxt1: gen nobs=_N
drop if fakemax==1 & nobs>1
drop nobs xmax




sort xxt1
gen xxt1_diff=xxt1[_n+1]-xxt1
expand xxt1_diff, gen(fake)
sort xxt1 fake
bysort xxt1 fake: gen obs=_n
replace xxt1=xxt1+obs if fake==1
replace fake=1 if fakeone==1
replace fake=1 if fakemax==1
replace w=0 if fake==1
sort xxt1
outfile xxt1 w using $raw/w.raw, replace

drop if fake==1
drop if xxt==.

local N=5
local NP1 = `N'+1
local NM1 = `N'-1

xtile imppricestate=pt, nquantiles(`N')


sort xxt

gen firmshare=1/`M'

outfile imppricestate xxt xxt1 using $raw/imp.raw, replace
outfile firmshare using $raw/firmshare.raw, replace

save $raw/final2, replace

keep xxt1 price_minyeart1 price_maxyeart1 cct1 newcitydiff lambdat1
rename xxt1 xxt
rename price_minyeart1 price_minyear
rename price_maxyeart1 price_maxyear
rename cct1 cct
rename newcitydiff citydiff
rename lambdat1 lambda

save $raw/t1, replace

use $raw/final2, clear

append using $raw/t1

sort xxt
duplicates drop xxt, force
outfile price_minyear using $raw/price_minyear.raw, replace
outfile price_maxyear using $raw/price_maxyear.raw, replace
outfile cct lambda using $raw/cities.raw, replace
sort cct
duplicates drop cct, force
outfile citydiff using $raw/citydiff.raw, replace

}


forv x=1/50 {
global raw "../china/estimation/serial_correlation/raw/sc/raw`x'"
use $raw/final2, clear
append using $raw/t1
sort xxt
duplicates drop xxt, force
gen nobs=_N
gen run=`x'
keep nobs run
save $raw/nobs, replace
}

use "../china/estimation/serial_correlation/raw/sc/raw1/nobs", clear
forv x=2/50 {
global raw "../china/estimation/serial_correlation/raw/sc/raw`x'"
append using $raw/nobs
}
duplicates drop run, force
outfile run nobs using $serial_corr/serial_corr_nobs.raw, replace


********************************************

** Now the case without serially correlated errors **

****** Start snipping code from "clean_data_hs10" ******


set more off
forv x=1/50 {
global raw "../china/estimation/serial_correlation/raw/no_sc/raw`x'"

use $serial_corr/runs/no_sc/run`x', clear
rename lambda2006_sim lambda
gen min_year=2005

/* Calculate the average price in 2005 at each exporter.  This will be merged back on later. */
preserve
keep if year==2005
bysort exp: egen price_minyear=mean(price2005_sim)
keep exp price_minyear
duplicates drop exp, force
save $raw/price_minyear, replace
restore

/* (1.5) Do the same for the average price in 2006 at each exporter.  This will help with counterfactuals. */
preserve
keep if year==2006
bysort exp: egen price_maxyear=mean(price2006_sim)
keep exp price_maxyear
duplicates drop exp, force
save $raw/price_maxyear, replace
restore

/* (2) Calculate the price increase in a city from 2005 to 2006.  This will be merged back on later. */
preserve

egen c=group(city)
egen city_total=max(c)

bysort city year: egen city_price2005=mean(price2005_sim)
bysort city year: egen city_price2006=mean(price2006_sim)

local C=city_total
forv i=1/`C' {
	gen city`i'preprice=city_price2005 if c==`i' & year==2005
	egen city`i'pre=max(city`i'preprice)
	gen city`i'postprice=city_price2006 if c==`i' & year!=2005
	egen city`i'post=max(city`i'postprice)
}

forv i=1/`C' {
	gen city`i'diff=city`i'post-city`i'pre
}

gen citydiff=0
forv i=1/`C' {
	replace citydiff=city`i'diff if c==`i'
}

gen citypre=0
gen citypost=0
forv i=1/`C' {
	replace citypre=city`i'pre if c==`i'
	replace citypost=city`i'post if c==`i'
}


keep city citydiff citypre citypost
drop if citydiff==. /* Can be missing if a city is only found in one year. I have handchecked. */
duplicates drop city, force
save $raw/citydiff, replace
restore

local N=5
local NP1 = `N'+1
local NM1 = `N'-1
egen minp=min(price2005_sim)
egen maxp=max(price2005_sim)

preserve
keep if year==min_year

*range statepts minp maxp `NP1'
pctile statep=price2005_sim, nquantiles(`N')
gen statepts=minp
replace statepts=statep[_n-1] if _n!=1
replace statepts=maxp if _n==`NP1'
keep in 1/`NP1'
outfile statepts using $raw/statepts.raw, replace
restore

merge m:1 exp using $raw/price_minyear
/* _m==3 means we have a price for them in 2005.  _m==1 means 2006 only.  _m==2 means that the only obs was
from an importer not found in 2006 */
drop if _m==2
*drop if _m==3
drop _m

merge m:1 exp using $raw/price_maxyear
/* _m==3 means we have a price for them in 2006.  _m==1 means that EXPORTER is only found in 2005.  _m==2 means that the 
IMPORTER from which an exporter's price was calculated in 2006 was not found in both years, meaning there is no exporter to match to. */
drop if _m==2
*drop if _m==3
drop _m

* Merge on the city information now: this way we won't be stuck with a majority exporter with no city info.
merge m:1 city using $raw/citydiff
/* _m==3 means we have a city difference for them.  _m==1 means only 1 year of city info.  _m==2 means that 
the only obs was from an importer not found in 2006 */
drop if _m!=3
drop _m




egen ct=group(city)
cap drop city_total
egen city_total=max(ct)
local C=city_total

egen m=group(imp)
egen importers=max(m)
local M=importers

egen xt=group(exp)
egen exporters=max(xt)
local X=exporters

sort imp year
gen ct1=ct[_n+1] if year==min_year
gen xt1=xt[_n+1] if year==min_year
gen pt1=price2006_sim[_n+1] if year==min_year
gen price_minyeart1=price_minyear[_n+1] if year==min_year /* This is necessary b/c the surviving price_minyear is that of the old exporter not the new */
gen price_maxyeart1=price_maxyear[_n+1] if year==min_year

gen lambdat1=lambda[_n+1] if year==min_year
drop if year!=min_year

gen exp_price=pt+citydiff if stayed==1


preserve
rename citydiff newcitydiff
rename citypre newcitypre
rename citypost newcitypost
keep ct newcitydiff newcitypre newcitypost
duplicates drop ct, force
rename ct ct1
save $raw/newcitydiff, replace
restore

merge m:1 ct1 using $raw/newcitydiff
* _m==2 means that in the end, no one moved to that city.  (only existed in 2005 in final data) Just drop.
* _m==1 means that city was never there in 2005 to calculate a difference for (only existed in 2006)
* Thus can just drop unless _m==3
drop if _m!=3
drop _m

forv j=1/`C' {
	replace price_minyeart1=newcitypre if ct1==`j' & price_minyeart1==.
}

/* Because of the difference between min and max year, this is not t1.  We need to replace the ones with
missing price_maxyear, as they won't be appended on after like the other variables. */

forv j=1/`C' {
	replace price_maxyear=newcitypost if ct1==`j' & price_maxyear==.
}


forv i=1/`X' {
	gen switchp`i'=price_minyear if xt==`i'
	egen p`i'=max(switchp`i')
	forv j=1/`C' {
		replace p`i'=newcitypre if p`i'==. & ct1==`j'
		}
}

forv i=1/`X' {
	replace exp_price=p`i'+ newcitydiff if stayed==0 & xt1==`i'
}

gen w=pt1-exp_price
/* The only missing observations are those that:
	-Had no information about the exporter before going there (no pre-price)
	-No information about the city before going there (no pre-city price)
At this point, I think it has been cleaned a lot- XX out of XX observations are missing.
OK drop them at this point. */
drop if w==.


save $raw/final, replace


** Standardize the exporter codes so they go from 1-X_max
use $raw/final, clear
keep xt1
rename xt1 xt
save $raw/xt1, replace

use $raw/final, clear
keep xt
append using $raw/xt1
duplicates drop xt, force
egen xxt=group(xt)
save $raw/xxt, replace
rename xt xt1
rename xxt xxt1
save $raw/xxt1, replace


** Standardize the city codes so they go from 1-C_max
use $raw/final, clear
keep ct1
rename ct1 ct
save $raw/ct1, replace

use $raw/final, clear
keep ct
append using $raw/ct1
duplicates drop ct, force
egen cct=group(ct)
save $raw/cct, replace
rename ct ct1
rename cct cct1
save $raw/cct1, replace


use $raw/final, clear
merge m:1 xt using $raw/xxt
drop if _m==2
drop _m
merge m:1 xt1 using $raw/xxt1
drop if _m==2
drop _m
merge m:1 ct using $raw/cct
drop if _m==2
drop _m
merge m:1 ct1 using $raw/cct1
drop if _m==2
drop _m





* For the case when there is no xxt=1
local o=_N
local p=`o'+1
set obs `p'
replace xxt1=1 in `p'
gen fakeone=0
replace fakeone=1 in `p'
sort xxt1
bysort xxt1: gen nobs=_N
drop if fakeone==1 & nobs>1
drop nobs

* For the case when there is no xxt=MAX
local o=_N
local p=`o'+1
set obs `p'

egen xmax=max(xxt)
local k=xmax

replace xxt1=xmax in `p'
gen fakemax=0
replace fakemax=1 in `p'
sort xxt1
bysort xxt1: gen nobs=_N
drop if fakemax==1 & nobs>1
drop nobs xmax




sort xxt1
gen xxt1_diff=xxt1[_n+1]-xxt1
expand xxt1_diff, gen(fake)
sort xxt1 fake
bysort xxt1 fake: gen obs=_n
replace xxt1=xxt1+obs if fake==1
replace fake=1 if fakeone==1
replace fake=1 if fakemax==1
replace w=0 if fake==1
sort xxt1
outfile xxt1 w using $raw/w.raw, replace

drop if fake==1
drop if xxt==.

local N=5
local NP1 = `N'+1
local NM1 = `N'-1

xtile imppricestate=pt, nquantiles(`N')


sort xxt

gen firmshare=1/`M'

outfile imppricestate xxt xxt1 using $raw/imp.raw, replace
outfile firmshare using $raw/firmshare.raw, replace

save $raw/final2, replace

keep xxt1 price_minyeart1 price_maxyeart1 cct1 newcitydiff lambdat1
rename xxt1 xxt
rename price_minyeart1 price_minyear
rename price_maxyeart1 price_maxyear
rename cct1 cct
rename newcitydiff citydiff
rename lambdat1 lambda
save $raw/t1, replace

use $raw/final2, clear

append using $raw/t1

sort xxt
duplicates drop xxt, force
outfile price_minyear using $raw/price_minyear.raw, replace
outfile price_maxyear using $raw/price_maxyear.raw, replace
outfile cct lambda using $raw/cities.raw, replace
sort cct
duplicates drop cct, force
outfile citydiff using $raw/citydiff.raw, replace

}


forv x=1/50 {
global raw "../china/estimation/serial_correlation/raw/no_sc/raw`x'"
use $raw/final2, clear
append using $raw/t1
sort xxt
duplicates drop xxt, force
gen nobs=_N
gen run=`x'
keep nobs run
save $raw/nobs, replace
}



